import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import plotly.express as px
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt
import numpy as np
from sklearn_extra.cluster import KMedoids
from sklearn.datasets import make_blobs
import requests
import folium
dataset = pd.read_csv('Country.csv')
dataset.head(16)
| country | child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 90.2 | 10.0 | 7.58 | 44.9 | 1610 | 9.440 | 56.2 | 5.82 | 553 |
| 1 | Albania | 16.6 | 28.0 | 6.55 | 48.6 | 9930 | 4.490 | 76.3 | 1.65 | 4090 |
| 2 | Algeria | 27.3 | 38.4 | 4.17 | 31.4 | 12900 | 16.100 | 76.5 | 2.89 | 4460 |
| 3 | Angola | 119.0 | 62.3 | 2.85 | 42.9 | 5900 | 22.400 | 60.1 | 6.16 | 3530 |
| 4 | Antigua and Barbuda | 10.3 | 45.5 | 6.03 | 58.9 | 19100 | 1.440 | 76.8 | 2.13 | 12200 |
| 5 | Argentina | 14.5 | 18.9 | 8.10 | 16.0 | 18700 | 20.900 | 75.8 | 2.37 | 10300 |
| 6 | Armenia | 18.1 | 20.8 | 4.40 | 45.3 | 6700 | 7.770 | 73.3 | 1.69 | 3220 |
| 7 | Australia | 4.8 | 19.8 | 8.73 | 20.9 | 41400 | 1.160 | 82.0 | 1.93 | 51900 |
| 8 | Austria | 4.3 | 51.3 | 11.00 | 47.8 | 43200 | 0.873 | 80.5 | 1.44 | 46900 |
| 9 | Azerbaijan | 39.2 | 54.3 | 5.88 | 20.7 | 16000 | 13.800 | 69.1 | 1.92 | 5840 |
| 10 | Bahamas | 13.8 | 35.0 | 7.89 | 43.7 | 22900 | -0.393 | 73.8 | 1.86 | 28000 |
| 11 | Bahrain | 8.6 | 69.5 | 4.97 | 50.9 | 41100 | 7.440 | 76.0 | 2.16 | 20700 |
| 12 | Bangladesh | 49.4 | 16.0 | 3.52 | 21.8 | 2440 | 7.140 | 70.4 | 2.33 | 758 |
| 13 | Barbados | 14.2 | 39.5 | 7.97 | 48.7 | 15300 | 0.321 | 76.7 | 1.78 | 16000 |
| 14 | Belarus | 5.5 | 51.4 | 5.61 | 64.5 | 16200 | 15.100 | 70.4 | 1.49 | 6030 |
| 15 | Belgium | 4.5 | 76.4 | 10.70 | 74.7 | 41100 | 1.880 | 80.0 | 1.86 | 44400 |
dataset.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 167 entries, 0 to 166 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 country 167 non-null object 1 child_mort 167 non-null float64 2 exports 167 non-null float64 3 health 167 non-null float64 4 imports 167 non-null float64 5 income 167 non-null int64 6 inflation 167 non-null float64 7 life_expec 167 non-null float64 8 total_fer 167 non-null float64 9 gdpp 167 non-null int64 dtypes: float64(7), int64(2), object(1) memory usage: 13.2+ KB
dataset.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| child_mort | 167.0 | 38.270060 | 40.328931 | 2.6000 | 8.250 | 19.30 | 62.10 | 208.00 |
| exports | 167.0 | 41.108976 | 27.412010 | 0.1090 | 23.800 | 35.00 | 51.35 | 200.00 |
| health | 167.0 | 6.815689 | 2.746837 | 1.8100 | 4.920 | 6.32 | 8.60 | 17.90 |
| imports | 167.0 | 46.890215 | 24.209589 | 0.0659 | 30.200 | 43.30 | 58.75 | 174.00 |
| income | 167.0 | 17144.688623 | 19278.067698 | 609.0000 | 3355.000 | 9960.00 | 22800.00 | 125000.00 |
| inflation | 167.0 | 7.781832 | 10.570704 | -4.2100 | 1.810 | 5.39 | 10.75 | 104.00 |
| life_expec | 167.0 | 70.555689 | 8.893172 | 32.1000 | 65.300 | 73.10 | 76.80 | 82.80 |
| total_fer | 167.0 | 2.947964 | 1.513848 | 1.1500 | 1.795 | 2.41 | 3.88 | 7.49 |
| gdpp | 167.0 | 12964.155689 | 18328.704809 | 231.0000 | 1330.000 | 4660.00 | 14050.00 | 105000.00 |
# Quantidade de paises
dataset.country.nunique(dropna = True)
167
plt.figure(figsize=(15,5))
sns.boxplot(data=dataset,orient='h')
<AxesSubplot: >
q1 = dataset.income.quantile(0.25)
q3 = dataset.income.quantile(0.75)
IQR = q3 - q1
lower_limit = q1 - 1.5 * IQR
upper_limit = q3 + 1.5 * IQR
k1=dataset[dataset["income"]>upper_limit]
print("outliers on right side in income is \n",k1["income"],"\n")
dataset=dataset[dataset["income"]<upper_limit]
sns.boxplot(dataset["income"])
outliers on right side in income is 23 80600 82 75200 91 91700 114 62300 123 125000 133 72100 145 55500 157 57600 Name: income, dtype: int64
<AxesSubplot: >
q1 = dataset.income.quantile(0.25)
q3 = dataset.income.quantile(0.75)
IQR = q3 - q1
lower_limit = q1 - 1.5 * IQR
upper_limit = q3 + 1.5 * IQR
k1=dataset[dataset["gdpp"]>upper_limit]
print("outliers on right side in gdpp is \n",k1["gdpp"],"\n")
dataset=dataset[dataset["gdpp"]<upper_limit]
sns.boxplot(dataset["gdpp"])
outliers on right side in gdpp is 7 51900 8 46900 29 47400 44 58000 53 46200 73 48700 110 50300 144 52100 159 48400 Name: gdpp, dtype: int64
<AxesSubplot: >
plt.figure(figsize=(15,5))
sns.boxplot(data=dataset,orient='h')
<AxesSubplot: >
q1 = dataset.gdpp.quantile(0.25)
q3 = dataset.gdpp.quantile(0.75)
IQR = q3 - q1
lower_limit = q1 - 1.5 * IQR
upper_limit = q3 + 1.5 * IQR
k1=dataset[dataset["income"] > upper_limit]
data2=dataset[dataset["gdpp"] < upper_limit]
sns.boxplot(dataset["gdpp"])
<AxesSubplot: >
plt.figure(figsize=(15,5))
sns.boxplot(data=data2,orient='h')
<AxesSubplot: >
#tamanho do dataset
data2.shape
(137, 10)
data2
| country | child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 90.2 | 10.0 | 7.58 | 44.9 | 1610 | 9.44 | 56.2 | 5.82 | 553 |
| 1 | Albania | 16.6 | 28.0 | 6.55 | 48.6 | 9930 | 4.49 | 76.3 | 1.65 | 4090 |
| 2 | Algeria | 27.3 | 38.4 | 4.17 | 31.4 | 12900 | 16.10 | 76.5 | 2.89 | 4460 |
| 3 | Angola | 119.0 | 62.3 | 2.85 | 42.9 | 5900 | 22.40 | 60.1 | 6.16 | 3530 |
| 4 | Antigua and Barbuda | 10.3 | 45.5 | 6.03 | 58.9 | 19100 | 1.44 | 76.8 | 2.13 | 12200 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 162 | Vanuatu | 29.2 | 46.6 | 5.25 | 52.7 | 2950 | 2.62 | 63.0 | 3.50 | 2970 |
| 163 | Venezuela | 17.1 | 28.5 | 4.91 | 17.6 | 16500 | 45.90 | 75.4 | 2.47 | 13500 |
| 164 | Vietnam | 23.3 | 72.0 | 6.84 | 80.2 | 4490 | 12.10 | 73.1 | 1.95 | 1310 |
| 165 | Yemen | 56.3 | 30.0 | 5.18 | 34.4 | 4480 | 23.60 | 67.5 | 4.67 | 1310 |
| 166 | Zambia | 83.1 | 37.0 | 5.89 | 30.9 | 3280 | 14.00 | 52.0 | 5.40 | 1460 |
137 rows × 10 columns
# Quantidade de paises
data2.country.nunique(dropna = True)
#data2=data2.drop(['Cluster'],axis=1)
#occur = data2.groupby(['country']).size()
#occur
137
pd.set_option("display.float", "{:.2f}".format)
data2.describe()
| child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | |
|---|---|---|---|---|---|---|---|---|---|
| count | 137.00 | 137.00 | 137.00 | 137.00 | 137.00 | 137.00 | 137.00 | 137.00 | 137.00 |
| mean | 45.51 | 38.13 | 6.36 | 46.90 | 10257.39 | 8.88 | 68.42 | 3.20 | 5499.37 |
| std | 41.11 | 22.14 | 2.37 | 21.36 | 9371.18 | 11.21 | 8.37 | 1.56 | 5661.36 |
| min | 3.20 | 0.11 | 1.97 | 0.07 | 609.00 | -4.21 | 32.10 | 1.23 | 231.00 |
| 25% | 14.60 | 22.80 | 4.84 | 31.40 | 2660.00 | 2.61 | 62.20 | 1.92 | 1170.00 |
| 50% | 28.10 | 32.90 | 5.89 | 44.90 | 7710.00 | 6.35 | 70.40 | 2.65 | 3450.00 |
| 75% | 64.40 | 50.50 | 7.76 | 58.90 | 15400.00 | 12.10 | 74.70 | 4.56 | 8080.00 |
| max | 208.00 | 153.00 | 14.20 | 154.00 | 45400.00 | 104.00 | 80.40 | 7.49 | 23400.00 |
# variáveis que serão utilizadas
p = data2.hist(figsize = (10,12))
fig = px.choropleth(data2,
locationmode='country names',
locations='country',
color='inflation',
title='Paises com alta inflação'
)
fig.show()
fig = px.choropleth(data2,
locationmode='country names',
locations='country',
color='child_mort',
title='Paises com indice alto por morte infantil'
)
fig.show()
## paises com cores mais claras, são paises com alta mortalidade infantil
fig = px.choropleth(data2,
locationmode='country names',
locations='country',
color='life_expec',
title='Paises com alta expectativa de vida'
)
fig.show()
fig = px.choropleth(data2,
locationmode='country names',
locations='country',
color='health',
title='Paises com Indice de saúde'
)
fig.show()
data2
| country | child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 90.20 | 10.00 | 7.58 | 44.90 | 1610 | 9.44 | 56.20 | 5.82 | 553 |
| 1 | Albania | 16.60 | 28.00 | 6.55 | 48.60 | 9930 | 4.49 | 76.30 | 1.65 | 4090 |
| 2 | Algeria | 27.30 | 38.40 | 4.17 | 31.40 | 12900 | 16.10 | 76.50 | 2.89 | 4460 |
| 3 | Angola | 119.00 | 62.30 | 2.85 | 42.90 | 5900 | 22.40 | 60.10 | 6.16 | 3530 |
| 4 | Antigua and Barbuda | 10.30 | 45.50 | 6.03 | 58.90 | 19100 | 1.44 | 76.80 | 2.13 | 12200 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 162 | Vanuatu | 29.20 | 46.60 | 5.25 | 52.70 | 2950 | 2.62 | 63.00 | 3.50 | 2970 |
| 163 | Venezuela | 17.10 | 28.50 | 4.91 | 17.60 | 16500 | 45.90 | 75.40 | 2.47 | 13500 |
| 164 | Vietnam | 23.30 | 72.00 | 6.84 | 80.20 | 4490 | 12.10 | 73.10 | 1.95 | 1310 |
| 165 | Yemen | 56.30 | 30.00 | 5.18 | 34.40 | 4480 | 23.60 | 67.50 | 4.67 | 1310 |
| 166 | Zambia | 83.10 | 37.00 | 5.89 | 30.90 | 3280 | 14.00 | 52.00 | 5.40 | 1460 |
137 rows × 10 columns
data2.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| child_mort | 137.00 | 45.51 | 41.11 | 3.20 | 14.60 | 28.10 | 64.40 | 208.00 |
| exports | 137.00 | 38.13 | 22.14 | 0.11 | 22.80 | 32.90 | 50.50 | 153.00 |
| health | 137.00 | 6.36 | 2.37 | 1.97 | 4.84 | 5.89 | 7.76 | 14.20 |
| imports | 137.00 | 46.90 | 21.36 | 0.07 | 31.40 | 44.90 | 58.90 | 154.00 |
| income | 137.00 | 10257.39 | 9371.18 | 609.00 | 2660.00 | 7710.00 | 15400.00 | 45400.00 |
| inflation | 137.00 | 8.88 | 11.21 | -4.21 | 2.61 | 6.35 | 12.10 | 104.00 |
| life_expec | 137.00 | 68.42 | 8.37 | 32.10 | 62.20 | 70.40 | 74.70 | 80.40 |
| total_fer | 137.00 | 3.20 | 1.56 | 1.23 | 1.92 | 2.65 | 4.56 | 7.49 |
| gdpp | 137.00 | 5499.37 | 5661.36 | 231.00 | 1170.00 | 3450.00 | 8080.00 | 23400.00 |
# Matriz de correlação
corr_matrix = data2.corr()
fig, ax = plt.subplots(figsize=(15, 15))
ax = sns.heatmap(corr_matrix,
annot=True,
linewidths=0.5,
fmt=".2f",
cmap="YlGnBu");
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
C:\Users\BlueShift\AppData\Local\Temp\ipykernel_7816\2477129727.py:3: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
(9.5, -0.5)
x=data2.drop(['country'],axis=1).values
array([[9.02e+01, 1.00e+01, 7.58e+00, ..., 5.82e+00, 5.53e+02, 1.00e+00],
[1.66e+01, 2.80e+01, 6.55e+00, ..., 1.65e+00, 4.09e+03, 1.00e+00],
[2.73e+01, 3.84e+01, 4.17e+00, ..., 2.89e+00, 4.46e+03, 0.00e+00],
...,
[2.33e+01, 7.20e+01, 6.84e+00, ..., 1.95e+00, 1.31e+03, 1.00e+00],
[5.63e+01, 3.00e+01, 5.18e+00, ..., 4.67e+00, 1.31e+03, 1.00e+00],
[8.31e+01, 3.70e+01, 5.89e+00, ..., 5.40e+00, 1.46e+03, 1.00e+00]])
#normalização
scaler = StandardScaler()
X = scaler.fit_transform(x)
x
'''
normalized_df = (x-x.mean())/x.std()
normalized_df'''
'\nnormalized_df = (x-x.mean())/x.std()\nnormalized_df'
#Quanto maior o valor de WCSS, o agrupamento será menos preciso.
#Quanto menor o valor de WCSSS, o agrupamento será mais preciso.
#kmeans.intertia_ tem o valor WCSS para o número atual de clusters.
#init = 'K-means++' é a inicialização do centroide para cada cluster. (Escolhendo o ponto de dados aleatório certo em cada cluster.)
Wcss=[]
for i in range(1,11):
kmeans=KMeans(n_clusters=i,init='k-means++',random_state=9)
kmeans.fit(x)
Wcss.append(kmeans.inertia_)
print(Wcss)
[16302714565.573353, 5271125573.8398, 2486196021.565976, 1500330532.5287132, 1008471886.0701281, 674744053.0271779, 546427395.4781845, 454192475.71572864, 400608402.4926262, 344858346.21710336]
# Escolha do número de cluster
sns.set()
plt.plot(range(1,11),Wcss)
plt.title('The Elbow Point Graph')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.show()
kmeans=KMeans(n_clusters=3,init='k-means++',random_state=9)
y=kmeans.fit_predict(x)
y
array([1, 1, 0, 1, 0, 0, 1, 0, 2, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 2, 0, 1, 1, 1, 2, 1, 0,
1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1,
1, 0, 0, 1, 1, 2, 0, 0, 1, 1, 0, 0, 1, 2, 1, 0, 1, 1, 1, 0, 1, 1,
1, 1, 1, 1, 1, 2, 1, 0, 1, 1, 1, 0, 2, 0, 0, 1, 1, 2, 1, 0, 0, 1,
2, 2, 1, 0, 2, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
1, 0, 1, 1, 1])
n_clusters = 3
plt.figure(figsize=(8,8))
for s in np.arange(1,2):
kmeans=KMeans(n_clusters=3,init='k-means++',max_iter = 100, random_state=9)
#model = KMeans(n_clusters=n_clusters, init="k-means++")
pred = kmeans.fit_predict(X)
plt.figure(figsize=(20,10))
for i in range(0, n_clusters):
plt.scatter(X[pred == i, 0], X[pred == i, 1], s=50, label="Cluster %d" % i)
# plt.scatter(X[:,0],X[:,1], c=pred)
plt.scatter(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:,1], s = 200, c = 'black', label='centroids')
plt.title("Clusters")
plt.legend()
plt.show()
<Figure size 800x800 with 0 Axes>
import numpy as np
k_fit = kmeans.fit(x)
## Qual pais esta perto do cluster
k_fit.transform(x)
array([[16796.11746056, 3054.0401433 , 36657.22186935],
[ 7898.09997051, 5999.0303086 , 27733.28970699],
[ 5545.37763952, 8933.88921504, 25091.42672774],
[11600.87854944, 2232.25889556, 31448.34063079],
[ 4385.87266998, 17982.25783918, 15695.37947381],
[ 2879.98521118, 16623.92306551, 16994.73080394],
[11056.79501624, 2693.19143891, 30922.64343941],
[ 3111.81173158, 12305.16132595, 21793.96204877],
[27572.47927536, 41278.63324167, 8177.2317867 ],
[15976.39754809, 2230.75735718, 35841.19914027],
[ 7104.34318068, 17829.20895297, 18051.84270192],
[ 2918.03835485, 12555.62849826, 21519.06767322],
[ 9473.6230537 , 4283.87250397, 29332.19903691],
[16512.01192901, 2775.18827098, 36371.68002449],
[11858.46422775, 2106.42985638, 31715.52835312],
[12808.59089586, 1084.03378649, 32673.74893475],
[ 7762.62195935, 6014.85431476, 27625.4905621 ],
[ 3862.01091879, 9994.99512784, 23675.68821066],
[ 2796.98897084, 13751.47288807, 20274.77637683],
[ 2275.4683255 , 12014.58835712, 21749.9721985 ],
[16941.49565514, 3206.04080577, 36800.32767813],
[17690.40080004, 3954.96677989, 37548.46723816],
[15893.32476707, 2148.26237692, 35758.28298557],
[15508.97136055, 1785.28633913, 35367.43972335],
[11766.18012758, 2029.10641389, 31620.32827936],
[17477.36008947, 3753.47027208, 37332.24027003],
[16347.95000513, 2618.4007964 , 36206.03469335],
[ 5112.21081324, 18632.19587597, 15113.5360167 ],
[ 7948.41894668, 5822.44698148, 27812.68736424],
[ 5909.42137708, 7856.23712604, 25760.02374645],
[16863.8048185 , 3146.04591263, 36718.3182455 ],
[17775.37983196, 4053.23542957, 37629.26464896],
[12602.67833734, 1173.0470071 , 32462.07911114],
[ 3244.97056254, 10692.02842001, 22965.07214735],
[15527.45681527, 1791.9963858 , 35387.95156837],
[ 6022.19483038, 19551.32247998, 14224.54985466],
[16285.35216191, 29890.23114661, 4730.84441785],
[ 6149.28625526, 7625.236536 , 26010.33096045],
[ 8045.36145795, 5708.88862231, 27911.71937405],
[ 8941.65979014, 5571.75219244, 28642.29116708],
[10674.78122089, 3150.99826458, 30536.99825627],
[19344.44561387, 33051.26205858, 2453.22001199],
[16996.10864938, 3255.57004834, 36856.61342987],
[ 8646.03381289, 22308.35436853, 11403.14789867],
[10278.11192963, 3469.93566172, 30143.64879677],
[ 785.01464332, 12995.7508363 , 20615.33263287],
[16748.25285358, 3005.34493742, 36609.70402022],
[11168.33625949, 2608.20460784, 31034.49496861],
[15161.94318135, 1418.27268662, 35024.60048772],
[ 5202.62888417, 8754.72282731, 24941.36511515],
[11255.376368 , 2541.52687316, 31120.71471939],
[17114.96838149, 3395.66689021, 36969.31185823],
[16990.10081327, 3254.15625925, 36849.02538421],
[11889.27641359, 1866.8645788 , 31750.46609673],
[16838.53869032, 3109.05048522, 36696.00603119],
[ 7414.42481632, 21152.2631136 , 12460.34578051],
[13990.45735479, 603.93644026, 33855.69179677],
[ 9684.78866161, 4264.89719909, 29525.02007349],
[ 2718.3664292 , 13853.18001769, 20272.52774004],
[ 5633.56810959, 8753.67710994, 25228.34043543],
[ 9206.49284519, 4578.23609935, 29055.09046745],
[ 8513.08808267, 5427.41108128, 28343.49539006],
[ 3944.33440735, 17306.79415153, 16574.68146126],
[15835.72984753, 2091.07081299, 35698.39372086],
[16241.20226735, 2637.04616061, 36078.81927587],
[15613.49680222, 1872.01896138, 35478.97582259],
[14465.72683481, 879.20782288, 34331.38319782],
[ 3181.80008544, 16814.30337128, 16826.22891077],
[ 168.47876931, 13825.40750109, 19792.50676712],
[15821.6544647 , 2097.48438971, 35679.58875751],
[17699.18746088, 3972.78157181, 37554.45333543],
[13807.08741737, 27236.19921657, 8110.10014098],
[ 5809.06727307, 19554.93613418, 14057.50851597],
[ 6485.54618265, 7533.54253175, 26263.86280203],
[17056.51806722, 3313.58691639, 36917.84079834],
[17346.69426043, 3617.43269079, 37203.27072101],
[ 4944.1610687 , 18222.96205366, 15807.66656454],
[ 5951.802814 , 8041.1547024 , 25684.69248761],
[16493.70711932, 2752.84852737, 36354.6382527 ],
[17179.68904281, 30684.78960205, 4998.4433097 ],
[14994.56200877, 1254.8021057 , 34859.96867229],
[ 982.21083205, 13060.38477621, 20597.06764004],
[14189.8833536 , 1344.48350902, 34002.59889511],
[14267.19685527, 527.18181596, 34132.12628247],
[10536.771697 , 3455.77785129, 30379.2455573 ],
[ 3130.16543536, 10768.94770343, 22910.29324751],
[11482.9358138 , 2290.2767644 , 31349.27988676],
[17463.98552108, 3736.03992427, 37320.00604073],
[14766.72266451, 1136.82485617, 34632.46589699],
[ 8565.92058627, 5253.25910338, 28400.4203603 ],
[16448.10013461, 2701.70021657, 36311.82967586],
[17589.54022731, 3860.33384029, 37445.73894948],
[12844.60279459, 915.49175552, 32709.50504255],
[30926.53853757, 44496.2300869 , 12282.81133101],
[14269.39008234, 910.01548938, 34131.93882209],
[ 1152.6358325 , 12658.06603423, 20970.95523217],
[10551.11585854, 3229.15373079, 30416.69915559],
[ 7337.39887385, 6416.77961962, 27203.65875018],
[12567.56325917, 1286.70770847, 32432.98787725],
[ 6721.35044285, 20464.34355782, 13146.62833769],
[17481.46908642, 30750.36857278, 6567.22826625],
[ 1792.37671005, 14866.02841549, 18909.93004889],
[ 7160.19666847, 20713.3155156 , 13229.35434049],
[17016.782534 , 3283.1505121 , 36875.07147524],
[12080.99753784, 1846.81619871, 31920.38951667],
[31020.78228566, 44588.31726261, 12382.83514272],
[16079.22266979, 2346.65266171, 35938.40316731],
[ 4946.72877976, 9061.02978937, 24699.7867312 ],
[ 4629.87075867, 18350.01451882, 15301.06624888],
[17211.41842968, 3472.78992672, 37071.04866205],
[11845.92239773, 25502.71273485, 8323.03380713],
[19135.9268088 , 32469.37724591, 5849.24334536],
[16289.7709484 , 2630.81725764, 36135.52150393],
[ 4479.83961028, 9344.12000764, 24289.67234302],
[19386.3566541 , 32953.49278722, 3722.11969912],
[ 9767.05737341, 4320.61110165, 29584.58329628],
[ 6803.95460551, 7044.4555924 , 26616.09600324],
[14808.56478073, 1065.52419213, 34671.4174841 ],
[ 2062.00250072, 11740.3026955 , 21875.75716901],
[16270.70097147, 2525.45329597, 36133.49080487],
[16306.20065958, 2560.34530128, 36169.28707421],
[ 4692.78265987, 9693.50991964, 24240.19443821],
[15274.47026889, 2977.47506582, 34968.99123521],
[17175.62683791, 3441.78438107, 37033.72604727],
[12412.68334773, 1731.06642752, 32235.0374336 ],
[ 7500.90325826, 6457.04898502, 27315.15871658],
[ 2542.86645891, 16234.62273343, 17383.65916067],
[ 7679.87649516, 6141.85427752, 27532.55247421],
[16835.86452967, 3097.84378198, 36695.59512251],
[10259.0424303 , 3640.19562812, 30111.28265909],
[ 3099.25856516, 16192.80900234, 17619.95815404],
[14117.34328139, 575.30935928, 33983.46606883],
[14497.36045572, 1715.14514926, 34290.47636648],
[ 4565.51148999, 16782.34688097, 17558.81607933],
[13945.20101898, 661.7743349 , 33809.49770229],
[13953.57010393, 656.2380199 , 33817.9910999 ],
[14896.39389145, 1155.14969948, 34758.74091593]])
np.min(k_fit.transform(x), axis=1)
array([ 3054.0401433 , 5999.0303086 , 5545.37763952, 2232.25889556,
4385.87266998, 2879.98521118, 2693.19143891, 3111.81173158,
8177.2317867 , 2230.75735718, 7104.34318068, 2918.03835485,
4283.87250397, 2775.18827098, 2106.42985638, 1084.03378649,
6014.85431476, 3862.01091879, 2796.98897084, 2275.4683255 ,
3206.04080577, 3954.96677989, 2148.26237692, 1785.28633913,
2029.10641389, 3753.47027208, 2618.4007964 , 5112.21081324,
5822.44698148, 5909.42137708, 3146.04591263, 4053.23542957,
1173.0470071 , 3244.97056254, 1791.9963858 , 6022.19483038,
4730.84441785, 6149.28625526, 5708.88862231, 5571.75219244,
3150.99826458, 2453.22001199, 3255.57004834, 8646.03381289,
3469.93566172, 785.01464332, 3005.34493742, 2608.20460784,
1418.27268662, 5202.62888417, 2541.52687316, 3395.66689021,
3254.15625925, 1866.8645788 , 3109.05048522, 7414.42481632,
603.93644026, 4264.89719909, 2718.3664292 , 5633.56810959,
4578.23609935, 5427.41108128, 3944.33440735, 2091.07081299,
2637.04616061, 1872.01896138, 879.20782288, 3181.80008544,
168.47876931, 2097.48438971, 3972.78157181, 8110.10014098,
5809.06727307, 6485.54618265, 3313.58691639, 3617.43269079,
4944.1610687 , 5951.802814 , 2752.84852737, 4998.4433097 ,
1254.8021057 , 982.21083205, 1344.48350902, 527.18181596,
3455.77785129, 3130.16543536, 2290.2767644 , 3736.03992427,
1136.82485617, 5253.25910338, 2701.70021657, 3860.33384029,
915.49175552, 12282.81133101, 910.01548938, 1152.6358325 ,
3229.15373079, 6416.77961962, 1286.70770847, 6721.35044285,
6567.22826625, 1792.37671005, 7160.19666847, 3283.1505121 ,
1846.81619871, 12382.83514272, 2346.65266171, 4946.72877976,
4629.87075867, 3472.78992672, 8323.03380713, 5849.24334536,
2630.81725764, 4479.83961028, 3722.11969912, 4320.61110165,
6803.95460551, 1065.52419213, 2062.00250072, 2525.45329597,
2560.34530128, 4692.78265987, 2977.47506582, 3441.78438107,
1731.06642752, 6457.04898502, 2542.86645891, 6141.85427752,
3097.84378198, 3640.19562812, 3099.25856516, 575.30935928,
1715.14514926, 4565.51148999, 661.7743349 , 656.2380199 ,
1155.14969948])
np.min(k_fit.transform(x), axis=1).sum()
501604.0977535491
mag_df= pd.DataFrame(
zip(
np.min(k_fit.transform(x),axis=1),
k_fit.labels_
), columns=['distancia', 'cluster']
)
mag_df.head(20)
| distancia | cluster | |
|---|---|---|
| 0 | 3054.04 | 1 |
| 1 | 5999.03 | 1 |
| 2 | 5545.38 | 0 |
| 3 | 2232.26 | 1 |
| 4 | 4385.87 | 0 |
| 5 | 2879.99 | 0 |
| 6 | 2693.19 | 1 |
| 7 | 3111.81 | 0 |
| 8 | 8177.23 | 2 |
| 9 | 2230.76 | 1 |
| 10 | 7104.34 | 0 |
| 11 | 2918.04 | 0 |
| 12 | 4283.87 | 1 |
| 13 | 2775.19 | 1 |
| 14 | 2106.43 | 1 |
| 15 | 1084.03 | 1 |
| 16 | 6014.85 | 1 |
| 17 | 3862.01 | 0 |
| 18 | 2796.99 | 0 |
| 19 | 2275.47 | 0 |
data2.head(10)
| country | child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | Cluster | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 90.20 | 10.00 | 7.58 | 44.90 | 1610 | 9.44 | 56.20 | 5.82 | 553 | 1 |
| 1 | Albania | 16.60 | 28.00 | 6.55 | 48.60 | 9930 | 4.49 | 76.30 | 1.65 | 4090 | 1 |
| 2 | Algeria | 27.30 | 38.40 | 4.17 | 31.40 | 12900 | 16.10 | 76.50 | 2.89 | 4460 | 0 |
| 3 | Angola | 119.00 | 62.30 | 2.85 | 42.90 | 5900 | 22.40 | 60.10 | 6.16 | 3530 | 1 |
| 4 | Antigua and Barbuda | 10.30 | 45.50 | 6.03 | 58.90 | 19100 | 1.44 | 76.80 | 2.13 | 12200 | 0 |
| 5 | Argentina | 14.50 | 18.90 | 8.10 | 16.00 | 18700 | 20.90 | 75.80 | 2.37 | 10300 | 0 |
| 6 | Armenia | 18.10 | 20.80 | 4.40 | 45.30 | 6700 | 7.77 | 73.30 | 1.69 | 3220 | 1 |
| 9 | Azerbaijan | 39.20 | 54.30 | 5.88 | 20.70 | 16000 | 13.80 | 69.10 | 1.92 | 5840 | 0 |
| 11 | Bahrain | 8.60 | 69.50 | 4.97 | 50.90 | 41100 | 7.44 | 76.00 | 2.16 | 20700 | 2 |
| 12 | Bangladesh | 49.40 | 16.00 | 3.52 | 21.80 | 2440 | 7.14 | 70.40 | 2.33 | 758 | 1 |
data2['Cluster']=y
data2
C:\Users\BlueShift\AppData\Local\Temp\ipykernel_7816\498247848.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| country | child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | Cluster | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 90.20 | 10.00 | 7.58 | 44.90 | 1610 | 9.44 | 56.20 | 5.82 | 553 | 1 |
| 1 | Albania | 16.60 | 28.00 | 6.55 | 48.60 | 9930 | 4.49 | 76.30 | 1.65 | 4090 | 1 |
| 2 | Algeria | 27.30 | 38.40 | 4.17 | 31.40 | 12900 | 16.10 | 76.50 | 2.89 | 4460 | 0 |
| 3 | Angola | 119.00 | 62.30 | 2.85 | 42.90 | 5900 | 22.40 | 60.10 | 6.16 | 3530 | 1 |
| 4 | Antigua and Barbuda | 10.30 | 45.50 | 6.03 | 58.90 | 19100 | 1.44 | 76.80 | 2.13 | 12200 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 162 | Vanuatu | 29.20 | 46.60 | 5.25 | 52.70 | 2950 | 2.62 | 63.00 | 3.50 | 2970 | 1 |
| 163 | Venezuela | 17.10 | 28.50 | 4.91 | 17.60 | 16500 | 45.90 | 75.40 | 2.47 | 13500 | 0 |
| 164 | Vietnam | 23.30 | 72.00 | 6.84 | 80.20 | 4490 | 12.10 | 73.10 | 1.95 | 1310 | 1 |
| 165 | Yemen | 56.30 | 30.00 | 5.18 | 34.40 | 4480 | 23.60 | 67.50 | 4.67 | 1310 | 1 |
| 166 | Zambia | 83.10 | 37.00 | 5.89 | 30.90 | 3280 | 14.00 | 52.00 | 5.40 | 1460 | 1 |
137 rows × 11 columns
### Clusters 0,1 e 2
df_0=data2[data2['Cluster']==0]
df_1=data2[data2['Cluster']==1]
df_2=data2[data2['Cluster']==2]
#Países que representa seus clusters
import plotly.express as px
fig = px.scatter(data2, x = 'Cluster', y = 'country')
fig.show()
#Cluster 2
df_2
| country | child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | Cluster | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 11 | Bahrain | 8.60 | 69.50 | 4.97 | 50.90 | 41100 | 7.44 | 76.00 | 2.16 | 20700 | 2 |
| 43 | Czech Republic | 3.40 | 66.00 | 7.88 | 62.90 | 28300 | -1.43 | 77.50 | 1.51 | 19800 | 2 |
| 49 | Equatorial Guinea | 111.00 | 85.80 | 4.48 | 58.90 | 33700 | 24.90 | 60.90 | 5.21 | 17100 | 2 |
| 89 | Libya | 16.60 | 65.60 | 3.88 | 42.10 | 29600 | 14.20 | 76.10 | 2.41 | 12100 | 2 |
| 98 | Malta | 6.80 | 153.00 | 8.65 | 154.00 | 28300 | 3.83 | 80.30 | 1.36 | 21100 | 2 |
| 115 | Oman | 11.70 | 65.70 | 2.77 | 41.20 | 45300 | 15.60 | 76.10 | 2.90 | 19300 | 2 |
| 122 | Portugal | 3.90 | 29.90 | 11.00 | 37.40 | 27200 | 0.64 | 79.80 | 1.39 | 22500 | 2 |
| 128 | Saudi Arabia | 15.70 | 49.60 | 4.29 | 33.00 | 45400 | 17.20 | 75.10 | 2.96 | 19300 | 2 |
| 134 | Slovak Republic | 7.00 | 76.30 | 8.79 | 77.80 | 25200 | 0.48 | 75.50 | 1.43 | 16600 | 2 |
| 135 | Slovenia | 3.20 | 64.30 | 9.41 | 62.90 | 28700 | -0.99 | 79.50 | 1.57 | 23400 | 2 |
| 138 | South Korea | 4.10 | 49.40 | 6.93 | 46.20 | 30400 | 3.16 | 80.10 | 1.23 | 22100 | 2 |
#Cluster 1
df_1
| country | child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | Cluster | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 90.20 | 10.00 | 7.58 | 44.90 | 1610 | 9.44 | 56.20 | 5.82 | 553 | 1 |
| 1 | Albania | 16.60 | 28.00 | 6.55 | 48.60 | 9930 | 4.49 | 76.30 | 1.65 | 4090 | 1 |
| 3 | Angola | 119.00 | 62.30 | 2.85 | 42.90 | 5900 | 22.40 | 60.10 | 6.16 | 3530 | 1 |
| 6 | Armenia | 18.10 | 20.80 | 4.40 | 45.30 | 6700 | 7.77 | 73.30 | 1.69 | 3220 | 1 |
| 12 | Bangladesh | 49.40 | 16.00 | 3.52 | 21.80 | 2440 | 7.14 | 70.40 | 2.33 | 758 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 161 | Uzbekistan | 36.30 | 31.70 | 5.81 | 28.50 | 4240 | 16.50 | 68.80 | 2.34 | 1380 | 1 |
| 162 | Vanuatu | 29.20 | 46.60 | 5.25 | 52.70 | 2950 | 2.62 | 63.00 | 3.50 | 2970 | 1 |
| 164 | Vietnam | 23.30 | 72.00 | 6.84 | 80.20 | 4490 | 12.10 | 73.10 | 1.95 | 1310 | 1 |
| 165 | Yemen | 56.30 | 30.00 | 5.18 | 34.40 | 4480 | 23.60 | 67.50 | 4.67 | 1310 | 1 |
| 166 | Zambia | 83.10 | 37.00 | 5.89 | 30.90 | 3280 | 14.00 | 52.00 | 5.40 | 1460 | 1 |
84 rows × 11 columns
#Cluster 0
df_0
| country | child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | Cluster | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | Algeria | 27.30 | 38.40 | 4.17 | 31.40 | 12900 | 16.10 | 76.50 | 2.89 | 4460 | 0 |
| 4 | Antigua and Barbuda | 10.30 | 45.50 | 6.03 | 58.90 | 19100 | 1.44 | 76.80 | 2.13 | 12200 | 0 |
| 5 | Argentina | 14.50 | 18.90 | 8.10 | 16.00 | 18700 | 20.90 | 75.80 | 2.37 | 10300 | 0 |
| 9 | Azerbaijan | 39.20 | 54.30 | 5.88 | 20.70 | 16000 | 13.80 | 69.10 | 1.92 | 5840 | 0 |
| 13 | Barbados | 14.20 | 39.50 | 7.97 | 48.70 | 15300 | 0.32 | 76.70 | 1.78 | 16000 | 0 |
| 14 | Belarus | 5.50 | 51.40 | 5.61 | 64.50 | 16200 | 15.10 | 70.40 | 1.49 | 6030 | 0 |
| 21 | Botswana | 52.50 | 43.60 | 8.30 | 51.30 | 13300 | 8.92 | 57.10 | 2.88 | 6350 | 0 |
| 22 | Brazil | 19.80 | 10.70 | 9.01 | 11.80 | 14500 | 8.41 | 74.20 | 1.80 | 11200 | 0 |
| 24 | Bulgaria | 10.80 | 50.20 | 6.87 | 53.00 | 15300 | 1.11 | 73.90 | 1.57 | 6840 | 0 |
| 33 | Chile | 8.70 | 37.70 | 7.96 | 31.30 | 19400 | 8.96 | 79.10 | 1.88 | 12900 | 0 |
| 35 | Colombia | 18.60 | 15.90 | 7.59 | 17.80 | 10900 | 3.86 | 76.40 | 2.01 | 6250 | 0 |
| 39 | Costa Rica | 10.20 | 33.20 | 10.90 | 35.00 | 13000 | 6.57 | 80.40 | 1.92 | 8200 | 0 |
| 41 | Croatia | 5.50 | 37.60 | 7.76 | 38.10 | 20100 | 0.82 | 76.30 | 1.55 | 13500 | 0 |
| 45 | Dominican Republic | 34.40 | 22.70 | 6.22 | 33.30 | 11100 | 5.44 | 74.60 | 2.60 | 5450 | 0 |
| 51 | Estonia | 4.50 | 75.10 | 6.03 | 68.70 | 22700 | 1.74 | 76.00 | 1.72 | 14600 | 0 |
| 55 | Gabon | 63.70 | 57.70 | 3.50 | 18.90 | 15400 | 16.60 | 62.90 | 4.08 | 8750 | 0 |
| 61 | Grenada | 14.60 | 23.80 | 5.86 | 49.20 | 11200 | 0.48 | 71.30 | 2.24 | 7370 | 0 |
| 67 | Hungary | 6.00 | 81.80 | 7.33 | 76.50 | 22300 | 2.33 | 74.50 | 1.25 | 13100 | 0 |
| 71 | Iran | 19.30 | 24.40 | 5.60 | 19.40 | 17400 | 15.90 | 74.50 | 1.76 | 6530 | 0 |
| 72 | Iraq | 36.90 | 39.40 | 8.41 | 34.10 | 12700 | 16.60 | 67.20 | 4.56 | 4500 | 0 |
| 79 | Kazakhstan | 21.50 | 44.20 | 4.29 | 29.90 | 20100 | 19.50 | 68.40 | 2.60 | 9070 | 0 |
| 85 | Latvia | 7.80 | 53.70 | 6.68 | 55.10 | 18300 | -0.81 | 73.10 | 1.36 | 11300 | 0 |
| 86 | Lebanon | 10.30 | 35.80 | 7.03 | 60.20 | 16300 | 0.24 | 79.80 | 1.61 | 8860 | 0 |
| 90 | Lithuania | 6.10 | 65.30 | 7.04 | 67.20 | 21100 | 2.38 | 73.20 | 1.50 | 12000 | 0 |
| 92 | Macedonia, FYR | 10.40 | 39.80 | 7.09 | 58.10 | 11400 | 2.04 | 74.00 | 1.47 | 4540 | 0 |
| 95 | Malaysia | 7.90 | 86.90 | 4.39 | 71.00 | 21100 | 7.27 | 74.50 | 2.15 | 9070 | 0 |
| 96 | Maldives | 13.20 | 77.60 | 6.33 | 65.40 | 10500 | 2.88 | 77.90 | 2.23 | 7100 | 0 |
| 100 | Mauritius | 15.00 | 51.20 | 6.00 | 62.20 | 15900 | 1.13 | 73.40 | 1.57 | 8000 | 0 |
| 104 | Montenegro | 6.80 | 37.00 | 9.11 | 62.70 | 14000 | 1.60 | 76.40 | 1.77 | 6680 | 0 |
| 117 | Panama | 19.70 | 70.00 | 8.10 | 78.20 | 15400 | 2.59 | 77.80 | 2.62 | 8080 | 0 |
| 121 | Poland | 6.00 | 40.10 | 7.46 | 42.10 | 21800 | 1.66 | 76.30 | 1.41 | 12600 | 0 |
| 124 | Romania | 11.50 | 32.60 | 5.58 | 38.80 | 17800 | 3.53 | 73.70 | 1.59 | 8230 | 0 |
| 125 | Russia | 10.00 | 29.20 | 5.08 | 21.10 | 23100 | 14.20 | 69.20 | 1.57 | 10700 | 0 |
| 130 | Serbia | 7.60 | 32.90 | 10.40 | 47.90 | 12700 | 5.88 | 74.70 | 1.40 | 5410 | 0 |
| 131 | Seychelles | 14.40 | 93.80 | 3.40 | 108.00 | 20400 | -4.21 | 73.40 | 2.17 | 10800 | 0 |
| 137 | South Africa | 53.70 | 28.60 | 8.94 | 27.40 | 12000 | 6.35 | 54.30 | 2.59 | 7280 | 0 |
| 141 | St. Vincent and the Grenadines | 20.70 | 26.90 | 4.47 | 57.10 | 9920 | 4.44 | 71.60 | 2.07 | 6230 | 0 |
| 143 | Suriname | 24.10 | 52.50 | 7.01 | 38.40 | 14200 | 7.20 | 70.30 | 2.52 | 8300 | 0 |
| 148 | Thailand | 14.90 | 66.50 | 3.88 | 60.80 | 13500 | 4.08 | 76.60 | 1.55 | 5080 | 0 |
| 153 | Turkey | 19.10 | 20.40 | 6.74 | 25.50 | 18000 | 7.01 | 78.20 | 2.15 | 10700 | 0 |
| 160 | Uruguay | 10.60 | 26.30 | 8.35 | 25.40 | 17100 | 4.91 | 76.40 | 2.08 | 11900 | 0 |
| 163 | Venezuela | 17.10 | 28.50 | 4.91 | 17.60 | 16500 | 45.90 | 75.40 | 2.47 | 13500 | 0 |
fig = px.choropleth(data2,
locationmode='country names',
locations='country',
color='Cluster',
title='Agrupamento dos Cluster',
)
fig.show()
data2.groupby('Cluster').mean()
C:\Users\BlueShift\AppData\Local\Temp\ipykernel_7816\1341286259.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
| child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | |
|---|---|---|---|---|---|---|---|---|---|
| Cluster | |||||||||
| 0 | 17.74 | 43.85 | 6.70 | 45.21 | 16157.62 | 7.27 | 73.39 | 2.07 | 8947.62 |
| 1 | 63.06 | 31.04 | 6.15 | 45.94 | 4326.70 | 9.84 | 64.93 | 3.89 | 1947.79 |
| 2 | 17.45 | 70.46 | 6.64 | 60.66 | 33018.18 | 7.73 | 76.08 | 2.19 | 19454.55 |
fig = px.choropleth(df_2,
locationmode='country names',
locations='country',
color='Cluster',
title='Cluster 2',
)
fig.show()
## Agrupamento do cluster 2, são os países com alto indice de Renda
fig = px.choropleth(data2,
locationmode='country names',
locations='country',
color='income',
title='Indice de Renda para cluster 2 '
)
fig.show()
#Agrupamento do cluster 1, Indice de gdpp
fig = px.choropleth(df_1,
locationmode='country names',
locations='country',
color='Cluster',
title='Cluster 1',
)
fig.show()
fig = px.choropleth(data2,
locationmode='country names',
locations='country',
color='gdpp',
title='Indice de gdp para cluster 1 '
)
fig.show()
#GDP
#valor de bens e serviços produzidos pela economia do país menos o valor dos bens e serviços usados na produção
#Agrupamento do cluster 0, Indice de gdpp
fig = px.choropleth(df_0,
locationmode='country names',
locations='country',
color='Cluster',
title='Cluster 0',
)
fig.show()
fig = px.choropleth(data2,
locationmode='country names',
locations='country',
color='child_mort',
title='Indice de morte infantil para cluster 0 '
)
fig.show()
pd.set_option("display.float", "{:.2f}".format)
df_0.describe()
| child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | Cluster | |
|---|---|---|---|---|---|---|---|---|---|---|
| count | 42.00 | 42.00 | 42.00 | 42.00 | 42.00 | 42.00 | 42.00 | 42.00 | 42.00 | 42.00 |
| mean | 17.74 | 43.85 | 6.70 | 45.21 | 16157.62 | 7.27 | 73.39 | 2.07 | 8947.62 | 0.00 |
| std | 13.77 | 20.00 | 1.78 | 21.26 | 3694.73 | 8.66 | 5.32 | 0.67 | 3068.03 | 0.00 |
| min | 4.50 | 10.70 | 3.40 | 11.80 | 9920.00 | -4.21 | 54.30 | 1.25 | 4460.00 | 0.00 |
| 25% | 9.02 | 28.75 | 5.60 | 28.02 | 13075.00 | 1.68 | 71.97 | 1.57 | 6395.00 | 0.00 |
| 50% | 14.30 | 39.45 | 6.80 | 45.00 | 15950.00 | 4.68 | 74.50 | 1.92 | 8265.00 | 0.00 |
| 75% | 19.77 | 53.40 | 7.97 | 60.65 | 19000.00 | 8.95 | 76.40 | 2.34 | 11275.00 | 0.00 |
| max | 63.70 | 93.80 | 10.90 | 108.00 | 23100.00 | 45.90 | 80.40 | 4.56 | 16000.00 | 0.00 |
p = df_0.hist(figsize = (10,12))
pd.set_option("display.float", "{:.2f}".format)
df_1.describe()
| child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | Cluster | |
|---|---|---|---|---|---|---|---|---|---|---|
| count | 84.00 | 84.00 | 84.00 | 84.00 | 84.00 | 84.00 | 84.00 | 84.00 | 84.00 | 84.00 |
| mean | 63.06 | 31.04 | 6.15 | 45.94 | 4326.70 | 9.84 | 64.93 | 3.89 | 1947.79 | 1.00 |
| std | 41.84 | 16.82 | 2.57 | 18.98 | 2949.02 | 12.53 | 8.04 | 1.52 | 1421.81 | 0.00 |
| min | 6.90 | 0.11 | 1.97 | 0.07 | 609.00 | 0.51 | 32.10 | 1.27 | 231.00 | 1.00 |
| 25% | 26.40 | 19.50 | 4.73 | 32.02 | 1767.50 | 3.77 | 59.45 | 2.63 | 706.50 | 1.00 |
| 50% | 57.55 | 27.85 | 5.34 | 44.00 | 3355.00 | 6.99 | 65.70 | 3.63 | 1330.00 | 1.00 |
| 75% | 90.35 | 41.12 | 6.91 | 56.85 | 6702.50 | 12.10 | 71.15 | 5.13 | 3002.50 | 1.00 |
| max | 208.00 | 85.10 | 14.20 | 101.00 | 10400.00 | 104.00 | 77.90 | 7.49 | 5190.00 | 1.00 |
p = df_1.hist(figsize = (10,12))
pd.set_option("display.float", "{:.2f}".format)
df_2.describe()
| child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | Cluster | |
|---|---|---|---|---|---|---|---|---|---|---|
| count | 11.00 | 11.00 | 11.00 | 11.00 | 11.00 | 11.00 | 11.00 | 11.00 | 11.00 | 11.00 |
| mean | 17.45 | 70.46 | 6.64 | 60.66 | 33018.18 | 7.73 | 76.08 | 2.19 | 19454.55 | 2.00 |
| std | 31.39 | 31.16 | 2.69 | 33.69 | 7393.35 | 8.87 | 5.42 | 1.18 | 3225.64 | 0.00 |
| min | 3.20 | 29.90 | 2.77 | 33.00 | 25200.00 | -1.43 | 60.90 | 1.23 | 12100.00 | 2.00 |
| 25% | 4.00 | 56.95 | 4.38 | 41.65 | 28300.00 | 0.56 | 75.75 | 1.41 | 18200.00 | 2.00 |
| 50% | 7.00 | 65.70 | 6.93 | 50.90 | 29600.00 | 3.83 | 76.10 | 1.57 | 19800.00 | 2.00 |
| 75% | 13.70 | 72.90 | 8.72 | 62.90 | 37400.00 | 14.90 | 79.65 | 2.66 | 21600.00 | 2.00 |
| max | 111.00 | 153.00 | 11.00 | 154.00 | 45400.00 | 24.90 | 80.30 | 5.21 | 23400.00 | 2.00 |
p = df_2.hist(figsize = (10,12))
df = data2.set_index('country')
normalizedd_df = (df-df.mean())/df.std()
normalizedd_df
| child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | Cluster | |
|---|---|---|---|---|---|---|---|---|---|---|
| country | ||||||||||
| Afghanistan | 1.09 | -1.27 | 0.52 | -0.09 | -0.92 | 0.05 | -1.46 | 1.69 | -0.87 | 0.39 |
| Albania | -0.70 | -0.46 | 0.08 | 0.08 | -0.03 | -0.39 | 0.94 | -0.99 | -0.25 | 0.39 |
| Algeria | -0.44 | 0.01 | -0.92 | -0.73 | 0.28 | 0.64 | 0.97 | -0.20 | -0.18 | -1.33 |
| Angola | 1.79 | 1.09 | -1.48 | -0.19 | -0.46 | 1.21 | -0.99 | 1.90 | -0.35 | 0.39 |
| Antigua and Barbuda | -0.86 | 0.33 | -0.14 | 0.56 | 0.94 | -0.66 | 1.00 | -0.69 | 1.18 | -1.33 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Vanuatu | -0.40 | 0.38 | -0.47 | 0.27 | -0.78 | -0.56 | -0.65 | 0.19 | -0.45 | 0.39 |
| Venezuela | -0.69 | -0.43 | -0.61 | -1.37 | 0.67 | 3.30 | 0.83 | -0.47 | 1.41 | -1.33 |
| Vietnam | -0.54 | 1.53 | 0.20 | 1.56 | -0.62 | 0.29 | 0.56 | -0.80 | -0.74 | 0.39 |
| Yemen | 0.26 | -0.37 | -0.50 | -0.59 | -0.62 | 1.31 | -0.11 | 0.95 | -0.74 | 0.39 |
| Zambia | 0.91 | -0.05 | -0.20 | -0.75 | -0.74 | 0.46 | -1.96 | 1.42 | -0.71 | 0.39 |
137 rows × 10 columns
plt.figure(figsize=(14, 5))
plt.grid(False)
dendrogram = sch.dendrogram(sch.linkage(df, method='ward'), labels=df.index)
plt.title('Dendrogram')
plt.ylabel('Euclidean Distance')
Text(0, 0.5, 'Euclidean Distance')
fig = px.choropleth(data2,
locationmode='country names',
locations='country',
color='Cluster',
title='Agrupamento dos Cluster Kmeas',
)
fig.show()
.
O agrupamento k-means, os dados são separados em k clusters, geralmente têm que está em forma de vetores númericos. O algoritmo calcula a média de um conjunto de pontos de dados e a distância euclidiana entre eles.
Associar cada objeto de dados a seu centróide mais próximo
O agrupamento inicial é feito atrirbuindo cada dado, objeto ao centróide que está tão próximo a ele e a primeira iteração é concluida. O algoritmo funciona em iteraçoes até que os objetos não mudem seus centros de cluster. Os centróides movem suas posições até que os critérios de convergência sejam alcançados.
KMobj = KMedoids(n_clusters=3).fit(df)
labels = KMobj.labels_
unq_lab = set(labels)
colors_plot = [
plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unq_lab))
]
for k, col in zip(unq_lab, colors_plot):
class_member_mask = labels == k
xy = df[class_member_mask].values
plt.plot(
xy[:, 0],
xy[:, 1],
'o',
markerfacecolor=tuple(col),
markeredgecolor='white',
markersize=10,
);
plt.plot(
KMobj.cluster_centers_[:, 0],
KMobj.cluster_centers_[:, 1],
'o',
markerfacecolor='orange',
markeredgecolor='k',
markersize=10,
);
plt.title('Agrupamento de KMedoids - Medoids são representados em Laranja.', fontsize=14);